PsL Monthly 1993 December

home *** CD-ROM | disk | FTP | other *** search

/ PsL Monthly 1993 December / PSL Monthly Shareware CD-ROM (December 1993).iso / prgmming / dos / c / tagsgen.exe / CTAG.C < prev next >

Wrap

C/C++ Source or Header | 1992-03-28 | 74KB | 2,057 lines

/* EPSHeader File: ctag.c Author: J. Kercheval Created: Sun, 07/14/1991 17:24:44 */ /* EPSRevision History J. Kercheval Sat, 07/27/1991 22:08:04 creation J. Kercheval Sun, 08/18/1991 20:58:13 completion of CGetToken() J. Kercheval Wed, 08/21/1991 22:34:49 place function recognition J. Kercheval Wed, 08/21/1991 23:11:17 add defines and macros J. Kercheval Wed, 08/21/1991 23:54:33 add typedef and class parsing J. Kercheval Thu, 08/22/1991 23:53:51 add global variables J. Kercheval Thu, 08/22/1991 23:54:05 add enum, struct, union J. Kercheval Thu, 08/22/1991 23:54:28 add globals via typedefs J. Kercheval Sun, 08/25/1991 23:09:28 complete semantic parser J. Kercheval Tue, 08/27/1991 23:28:34 fix bug in typedef, struct, enum and union declarations J. Kercheval Sat, 08/31/1991 23:58:03 add prototype parsing J. Kercheval Tue, 09/03/1991 22:28:55 move many macros to functions J. Kercheval Tue, 09/03/1991 23:05:34 clean code and consolidate to functions J. Kercheval Wed, 09/04/1991 00:16:21 add GNU tag output format support J. Kercheval Sun, 09/08/1991 13:24:53 minor bug fix in function and global variable parser J. Kercheval Sun, 09/08/1991 21:31:06 fix bug in lexical parser J. Kercheval Mon, 09/09/1991 21:49:19 fix bug in function parser J. Kercheval Mon, 09/09/1991 22:39:12 fix buf in define parser J. Kercheval Tue, 09/10/1991 22:06:09 fix typedef parser J. Kercheval Wed, 09/11/1991 02:04:48 add extern symbol recognition J. Kercheval Wed, 09/11/1991 19:49:11 fix bug in function pointer variable declaration J. Kercheval Wed, 09/11/1991 20:38:13 add support for function pointer variable declarations after first declaration J. Kercheval Wed, 09/11/1991 21:51:37 move #directive parsing between semantic and lexical parser J. Kercheval Thu, 09/12/1991 22:44:43 add support for #ifdef blocks to avoid unmatched parens in ToLevelZero parsing J. Kercheval Wed, 09/18/1991 22:05:02 fix bug in GetToken and DiscardLine J. Kercheval Thu, 09/19/1991 22:26:09 fix bug in lexical parser when parsing non C syntax files J. Kercheval Thu, 10/03/1991 18:15:10 add support for Static declarations J. Kercheval Fri, 10/04/1991 11:13:23 add support for tagging enumeration constants J. Kercheval Mon, 10/07/1991 09:36:07 create CParseEnumerationConstants() J. Kercheval Tue, 11/12/1991 21:46:25 add junk filter on token output J. Kercheval Sat, 03/28/1992 13:50:06 fix a few bugs and add extern "C" parsing */ #include <string.h> #include "ctag.h" #include "tagio.h" #include "log.h" #define CBUFSIZE 4096 #define MAX_TOKEN_LENGTH 4096 /* function for determining if character is whitespace */ #define IsWhite(c) ( _C_white_table[c] ) /* the indexed table for white space character lookup */ BOOLEAN _C_white_table[256]; /* list of whitespace characters */ char C_white[] = " \f\t\v\n\r"; /* function for determining if character is a delimiter */ #define IsDelim(c) ( _C_delim_table[c] ) /* the indexed table for token delimiter lookup */ BOOLEAN _C_delim_table[256]; /* list of token delimiters */ char C_delim[] = " \f\t\v\n\r\"[](){}#;:,.'=-+*/%&|^~!<>?"; /* function for determining if character is a puncuator */ #define IsPunctuator(c) ( _C_punctuator_table[c] ) /* the indexed table for punctuator character lookup */ BOOLEAN _C_punctuator_table[256]; /* list of punctuators */ char C_punctuator[] = "[](){},;="; /* * symbol type information is tied to the switches in flags in CTags(). This * enum is used to denote the type of the current tag for determining where * the appropriate name is located */ enum SymbolTypeEnum { NOP, Function, ProtoType, Structure, TypeDefinition, Macro, Enumeration, EnumerationConstant, Union, GlobalVariable, Class, Define, Extern, Static }; /* convenient definition */ typedef enum SymbolTypeEnum SymbolType; /* the current file buffer state */ typedef struct BufferStruct { char Cbuf[CBUFSIZE + 1]; /* input buffer for get_token routine */ char *buffer; /* current index into the pointer */ long int token_char_location; /* current token char location */ long int token_line_location; /* current token line in buffer */ long int token_line_offset; /* offset of current line */ FILE *infile; } Buffer; /* the current input token state */ typedef struct TokenStruct { char sbuf1[MAX_TOKEN_LENGTH]; /* the first token buffer */ long int charloc1; /* the char location of sbuf1 */ long int tokenline1; /* the line number of sbuf1 */ long int lineoffset1; /* the line offset of sbuf1 */ char sbuf2[MAX_TOKEN_LENGTH]; /* the second token buffer */ long int charloc2; /* the char location of sbuf2 */ long int tokenline2; /* the line number of sbuf2 */ long int lineoffset2; /* the line offset of sbuf2 */ char *cur_token; /* pointer to the current token buffer */ long int *cur_char_location;/* the location of current token */ long int *cur_token_line; /* the line of the current token */ long int *cur_line_offset; /* the line offset of the current token */ char *prev_token; /* pointer to the last token buffer */ long int *prev_char_location; /* the location of previous token */ long int *prev_token_line; /* the line of the previous token */ long int *prev_line_offset; /* the line offset of the previous token */ int token_count; /* tokens seen since last */ int else_nesting_level; /* levels deep in #else/#elif nest */ BOOLEAN extern_active; /* minor state for this statement */ BOOLEAN CPP_extern_active; /* minor state for this statement */ BOOLEAN static_active; /* minor state for this statement */ } Token; /*---------------------------------------------------------------------------- * * CParserInit() initializes the tables required by the parser. The tables * used are a simple boolean index which are true if the character * corresponding to the index is a member of the associated table. * ---------------------------------------------------------------------------*/ void CParserInit() { char *s; int i; /* init the entire block to FALSE */ for (i = 0; i < 256; i++) { _C_delim_table[i] = FALSE; _C_white_table[i] = FALSE; _C_punctuator_table[i] = FALSE; } /* set the characters in the delim set to TRUE */ for (s = C_delim; *s; s++) { _C_delim_table[*s] = TRUE; } /* set the characters in the white set to TRUE */ for (s = C_white; *s; s++) { _C_white_table[*s] = TRUE; } /* set the characters in the punctuator set to TRUE */ for (s = C_punctuator; *s; s++) { _C_punctuator_table[*s] = TRUE; } } /*---------------------------------------------------------------------------- * * CSymbolWanted() returns true if flags are true for the symbol type passed * and false otherwise. The following mapping is done: * * Flag Type * --------- -------------- * flags->cf FunctionCall * flags->cp ProtoType * flags->cs Structure * flags->ct TypeDefinition * flags->cm Macro * flags->ce Enumeration * flags->ck EnumerationConstant * flags->cu Union * flags->cv GlobalVariable * flags->cc Class * flags->cd Define * ---------------------------------------------------------------------------*/ BOOLEAN CSymbolWanted(SymbolType type, Flags * flags) { switch (type) { case Function: if (flags->cf) return TRUE; break; case ProtoType: if (flags->cp) return TRUE; break; case GlobalVariable: if (flags->cv) return TRUE; break; case Define: if (flags->cd) return TRUE; break; case Macro: if (flags->cm) return TRUE; break; case Structure: if (flags->cs) return TRUE; break; case TypeDefinition: if (flags->ct) return TRUE; break; case Enumeration: if (flags->ce) return TRUE; break; case EnumerationConstant: if (flags->ck) return TRUE; break; case Union: if (flags->cu) return TRUE; break; case Class: if (flags->cc) return TRUE; break; default: return FALSE; break; } /* not reached */ return FALSE; } /*---------------------------------------------------------------------------- * * CTokenType() takes the token passed and determines if the token is a * special token. Special tokens require specialized handling in the parser. * The function returns the type of token according to the SymbolTypeEnum * enumeration. This routine can only tell so much from one symbol but will * return some type for all the *interesting* tokens. Anything that is * loosely defined is given back with the closest type available and the * parser must give it contextual meaning * ---------------------------------------------------------------------------*/ SymbolType CTokenType(char *token) { char start[] = "cestu"; /* list of starting characters of symbols */ /* look for dirty rejection */ if (!strchr(start, token[0])) return NOP; /* structure declarations */ if (!strcmp(token, "struct")) return Structure; /* type declaration */ if (!strcmp(token, "typedef")) return TypeDefinition; /* enumeration declaration */ if (!strcmp(token, "enum")) return Enumeration; /* union declaration */ if (!strcmp(token, "union")) return Union; /* class declaration */ if (!strcmp(token, "class")) return Class; /* extern declaration */ if (!strcmp(token, "extern")) return Extern; /* static declaration */ if (!strcmp(token, "static")) return Static; /* do not recognize it as anything special */ return NOP; } /*---------------------------------------------------------------------------- * * CIsDeclarationToken() takes the token passed and determines if the token * is a declaration keyword used in C. The user may define new declaration * keywords via use of the typedef keyword. This alters the syntax of C. If * the syntax is changed in this way it is probable that this routine would * not return the correct value. For the standard uses of this routine that * information should not hinder performance for the vast majority of the * cases. * ---------------------------------------------------------------------------*/ #define SYMBOL_SIZE 20 BOOLEAN CIsDeclarationToken(char *token) { char token_list[][SYMBOL_SIZE] = { "*ivclsdfuaretp_hn\"", /* list of starting characters of symbols * below */ "*", /* pointer */ "\"C\"", /* C++ extern for C code */ "int", /* integer declaration */ "void", /* void type */ "char", /* character */ "long", /* long integer */ "short", /* short integer */ "double", /* double floating point */ "float", /* floating point */ "signed", /* signed integer */ "unsigned", /* unsigned integer */ "auto", /* auto variable (local duration) */ "register", /* register variable */ "static", /* static variable */ "struct", /* structure define */ "union", /* union define */ "enum", /* enum defined */ "typedef", /* type definition */ "const", /* constant variable */ "extern", /* external declaration */ "class", /* class declaration */ "friend", /* class modifier */ "private", /* class modifier */ "protected", /* class modifier */ "public", /* class modifier */ "volatile", /* Compiler warning */ "_based", /* pointer type */ "_cdecl", /* parameter calling sequence, C style */ "cdecl", /* parameter calling sequence, C style */ "_far", /* pointer type */ "far", /* pointer type */ "_huge", /* pointer type */ "huge", /* pointer type */ "_near", /* pointer type */ "near", /* pointer type */ "_pascal", /* parameter calling sequence, PASCAL style */ "pascal", /* parameter calling sequence, PASCAL style */ "_fortran", /* parameter calling sequence, FORTRAN style */ "_fastcall", /* parameter calling sequence, via registers */ "\0" }; int index; /* look for dirty rejection */ if (!strchr(token_list[0], token[0])) return FALSE; /* march through array until membership is determined */ for (index = 1; *token_list[index]; (index)++) { /* return true if token found */ if (!strcmp(token, token_list[index])) { return TRUE; } } /* did not find it */ return FALSE; } /*---------------------------------------------------------------------------- * * COutputToken() will output a token of a given type. The token is output * if the passed token type is requested from the command line. * ---------------------------------------------------------------------------*/ void COutputToken(Token * token, Buffer * token_buffer, SymbolType token_type, FILE * outfile, char *infname, Flags * flags) { char line[MAX_TOKEN_LENGTH];/* the line for use with GNU output format */ long int old_offset; /* the previous value of the file ptr */ int line_length; /* the length of the line */ /* init */ line[0] = '\0'; /* filter any junk tags */ if (!IsDelim(token->prev_token[0])) { /* check that the symbol is wanted and output it if so */ if (CSymbolWanted(token_type, flags)) { /* return if external and externals not wanted */ if (token->extern_active) { if (!flags->cx) { if (token_type != Function && token_type != Define && token_type != Macro) { return; } } } /* return if static and statics are not wanted */ if (token->static_active) { if (!flags->ci) { if (token_type != Define && token_type != Macro) { return; } } } /* if Epsilon or GNU output is specified then we need to output the full line */ if (flags->og || flags->oe) { /* store the current file offset, move to the line offset, read * the line into a buffer and restore the file offset */ old_offset = ftell(token_buffer->infile); if (fseek(token_buffer->infile, *(token->prev_line_offset), SEEK_SET)) { log_message("# COutputToken() -- internal error - continuing"); } else { fgets(line, MAX_TOKEN_LENGTH, token_buffer->infile); line_length = strlen(line); if (line[line_length - 1] == '\n') { line[line_length - 1] = '\0'; } if (fseek(token_buffer->infile, old_offset, SEEK_SET)) { log_message("# COutputToken() -- internal error - continuing"); } } } OutputTag(outfile, line, token->prev_token, infname, *(token->prev_token_line), *(token->prev_char_location) - strlen(token->prev_token), flags); } } } /*---------------------------------------------------------------------------- * * CGetToken() will obtain the next token in the line pointed to by lptr and * in addition will return FALSE if EOL is reached. This routine is passed * an inbut buffer (Cbuf) and a current pointer into the buffer. It is the * responsibility of this routine to refill the buffer if required. Quoted * strings and single quoted characters are returned as a single token. * Comments are completely ignored by this parser. The token will not exceed * max_token_length - 1 in length (not including the end of line delimiter) * ---------------------------------------------------------------------------*/ BOOLEAN CGetToken(FILE * infile, char **buffer, char *Cbuf, char *token, int max_token_length, long int *line_number, long int *char_number, long int *line_offset) { typedef enum parser_state { /* a state of the lexical parser */ Parse, BeginCommentMaybe, InComment, InCommentEndMaybe, InCPPComment, InQuoteNormal, InQuoteLiteral, InSingleQuoteNormal, InSingleQuoteLiteral, EndSingleQuote, WhiteSpace, Exit } State; State current_state; /* the current state of the parser */ char c; /* the current character being examined */ char *t; /* pointer into token */ int token_length; /* the current token length cannot exceed max * token length */ /* init */ current_state = WhiteSpace; t = token; *t = '\0'; token_length = 0; /* parse the file for the next token */ while (TRUE) { c = **buffer; /* if the buffer has been completely used, refill the buffer, I make * the tacit assumption here that the null character is not a member * of the source file */ if (!c) { *buffer = Cbuf; if (FillBuffer(infile, Cbuf, (long int) CBUFSIZE)) { c = **buffer; } else { /* return the token if it exists */ if (t != token) { *t = '\0'; return TRUE; } else return FALSE; } } /* react on the state machine */ switch (current_state) { case Parse: switch (c) { case '/': /* return if we already have a token */ if (t != token) { (*buffer)--; (*char_number)--; current_state = Exit; } else { /* this may be the begin if a comment or the * division symbol, read the next character after * verifying it the buffer doesn't need refilling */ current_state = BeginCommentMaybe; *t = c; } break; case '\"': /* return if we already have a token */ if (t != token) { (*buffer)--; (*char_number)--; current_state = Exit; } else { current_state = InQuoteNormal; *t++ = c; token_length++; } break; case '\'': /* return if we already have a token */ if (t != token) { (*buffer)--; (*char_number)--; current_state = Exit; } else { current_state = InSingleQuoteNormal; *t++ = c; token_length++; } break; default: /* if it is a delimiter than stop processing */ if (IsDelim(c)) { /* if a token exists then back up in buffer */ if (t != token) { (*buffer)--; (*char_number)--; } else { *t++ = c; token_length++; } current_state = Exit; } else { /* normal character, store it in the token */ *t++ = c; token_length++; } break; } break; case WhiteSpace: /* pass over whitespace, backup one char if no longer in * white space region */ if (!IsWhite(c)) { current_state = Parse; (*buffer)--; (*char_number)--; } else { /* check for newline */ if (c == '\n') { (*line_number)++; *line_offset = *char_number + *line_number; } } break; case BeginCommentMaybe: switch (c) { case '/': current_state = InCPPComment; break; case '*': current_state = InComment; break; default: t++; token_length++; (*buffer)--; (*char_number)--; current_state = Exit; break; } break; case InComment: switch (c) { case '*': /* this is potentially the end of the comment */ current_state = InCommentEndMaybe; break; case '\n': /* new line just increment state variables */ (*line_number)++; *line_offset = *char_number + *line_number; break; default: break; } break; case InCommentEndMaybe: switch (c) { case '/': /* this is indeed the end of the comment */ current_state = WhiteSpace; break; case '*': /* this is also perhaps the end of comment */ break; case '\n': /* new line just increment state variables */ (*line_number)++; *line_offset = *char_number + *line_number; default: /* still part of the current comment */ current_state = InComment; break; } break; case InCPPComment: if (c == '\n') { current_state = WhiteSpace; (*line_number)++; *line_offset = *char_number + *line_number; } break; case InQuoteNormal: switch (c) { case '\"': /* end of InQuoteNormal state */ current_state = Exit; break; case '\\': /* InQuoteLiteral state */ current_state = InQuoteLiteral; break; default: /* normal dull behavior */ break; } *t++ = c; token_length++; break; case InQuoteLiteral: /* this char is simply copied */ current_state = InQuoteNormal; *t++ = c; token_length++; break; case InSingleQuoteNormal: switch (c) { case '\\': /* InQuoteLiteral state */ current_state = InSingleQuoteLiteral; break; default: /* Just copy the character and move to close quote */ current_state = EndSingleQuote; break; } *t++ = c; token_length++; break; case InSingleQuoteLiteral: /* this char is simply copied */ current_state = EndSingleQuote; *t++ = c; token_length++; break; case EndSingleQuote: /* end of InSingleQuote states */ current_state = Exit; *t++ = c; token_length++; break; case Exit: *t = '\0'; return TRUE; break; default: /* not reached */ break; } /* if the token_length has gotten too large then return */ if (token_length == max_token_length - 1) { *t = '\0'; return TRUE; } /* move to the next buffer location */ (*buffer)++; (*char_number)++; } } /*---------------------------------------------------------------------------- * * CFillToken() will obtain the next lexical parser from the buffer and move * the token into the Token structure. TRUE is returned if the lexical * parser returns TRUE, otherwise FALSE is returned. * ---------------------------------------------------------------------------*/ BOOLEAN CFillToken(Token * token, Buffer * token_buffer) { BOOLEAN token_found; /* obtain the next token */ token_found = CGetToken(token_buffer->infile, &(token_buffer->buffer), token_buffer->Cbuf, token->cur_token, MAX_TOKEN_LENGTH, &(token_buffer->token_line_location), &(token_buffer->token_char_location), &(token_buffer->token_line_offset)); /* if one is around then update the state for that token */ if (token_found) { /* update location variables */ *(token->cur_char_location) = token_buffer->token_char_location; *(token->cur_token_line) = token_buffer->token_line_location; *(token->cur_line_offset) = token_buffer->token_line_offset; } return token_found; } /*---------------------------------------------------------------------------- * * CTokenSwap() will swap the token variables and set the prev_ variables * correctly * ---------------------------------------------------------------------------*/ void CTokenSwap(Token * token) { char *charswap; /* temporary swap variable */ long int *longintswap; /* temporary swap variable */ /* swap the active token string */ charswap = token->cur_token; token->cur_token = token->prev_token; token->prev_token = charswap; /* swap the active character location */ longintswap = token->cur_char_location; token->cur_char_location = token->prev_char_location; token->prev_char_location = longintswap; /* swap the active line */ longintswap = token->cur_token_line; token->cur_token_line = token->prev_token_line; token->prev_token_line = longintswap; /* swap the active line offset */ longintswap = token->cur_line_offset; token->cur_line_offset = token->prev_line_offset; token->prev_line_offset = longintswap; } /*---------------------------------------------------------------------------- * * CDiscardLine() will move past all the characters up to the next EOL that * is not preceded by a line continuation character. This routine will * return TRUE if there was a '(' character as the first character. This * return value is useful for determining if #defines are macros or simple * defines. * ---------------------------------------------------------------------------*/ BOOLEAN CDiscardLine(FILE * infile, char **buffer, char *Cbuf, long int *line_number, long int *char_number, long int *line_offset) { char c; /* the current character being examined */ BOOLEAN line_continue; /* TRUE if line continuation true */ BOOLEAN is_macro; /* TRUE if the first delimiter char is '(' */ BOOLEAN first_char; /* TRUE when first character is active */ /* init */ c = '\0'; line_continue = FALSE; is_macro = FALSE; first_char = TRUE; /* loop until non continued EOL encountered */ do { c = **buffer; (*buffer)++; (*char_number)++; /* handle the newline */ if (c == '\n') { line_continue = FALSE; (*line_number)++; *line_offset = *char_number + *line_number - 1; } /* if the buffer has been completely used, refill the buffer, I make * the tacit assumption here that the null character is not a member * of the source file */ if (!c) { *buffer = Cbuf; if (FillBuffer(infile, Cbuf, (long int) CBUFSIZE)) { c = **buffer; (*char_number)--; } else { /* end of file reached */ return is_macro; } } if (c == '\\') line_continue = TRUE; if (first_char) { if (c == '(') is_macro = TRUE; first_char = FALSE; } } while (c != '\n' || line_continue); return is_macro; } /*---------------------------------------------------------------------------- * * CParseDefine() will parse macros and defines in standard C syntax and * distinguish between a macro and a define, if there is a punctuator '(' as * the first character after the token, then it is a macro. * ---------------------------------------------------------------------------*/ void CParseDefine(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { SymbolType tmptype; /* a temporay type variable */ BOOLEAN token_found; BOOLEAN is_macro; token_found = CFillToken(token, token_buffer); if (token_found) { /* save the previous values */ CTokenSwap(token); /* get rid of the rest of the line and return the define type */ is_macro = CDiscardLine(token_buffer->infile, &(token_buffer->buffer), token_buffer->Cbuf, &(token_buffer->token_line_location), &(token_buffer->token_char_location), &(token_buffer->token_line_offset)); /* react on the token */ if (is_macro) { tmptype = Macro; } else { tmptype = Define; } /* output the token */ COutputToken(token, token_buffer, tmptype, outfile, infname, flags); } } /*---------------------------------------------------------------------------- * * CParsePreprocessorDirective() will parse preprocessor directives in * standard C syntax * ---------------------------------------------------------------------------*/ void CParsePreprocessorDirective(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { BOOLEAN token_found; token_found = CFillToken(token, token_buffer); if (token_found) { /* deal with a define directive */ if (!strcmp(token->cur_token, "define")) { CParseDefine(token, token_buffer, outfile, infname, flags); } else { /* increment the else block level pointer */ if (!strcmp(token->cur_token, "else")) { token->else_nesting_level++; } else { /* decrement the else block level pointer */ if (!strcmp(token->cur_token, "endif")) { if (token->else_nesting_level) token->else_nesting_level--; } else { /* if an else has not already been seen then increment * the level */ if (!strcmp(token->cur_token, "elif")) { token->else_nesting_level++; } } } /* remove the rest of the directive line including line * continuation characters */ CDiscardLine(token_buffer->infile, &(token_buffer->buffer), token_buffer->Cbuf, &(token_buffer->token_line_location), &(token_buffer->token_char_location), &(token_buffer->token_line_offset)); } } } /*---------------------------------------------------------------------------- * * CNextToken() will obtain the next token in the buffer and update the * appropriate variables. * ---------------------------------------------------------------------------*/ BOOLEAN CNextToken(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { BOOLEAN token_found; BOOLEAN cycle; do { /* obtain the next token */ token_found = CFillToken(token, token_buffer); /* check for preprocessing directives and parse them if found */ if (token->cur_token[0] == '#' && token_found) { /* parse the directive and loop back to get another token */ CParsePreprocessorDirective(token, token_buffer, outfile, infname, flags); cycle = TRUE; } else { /* we found a token to pass to the semantic parser */ cycle = FALSE; } } while (cycle); /* return it */ return token_found; } /*---------------------------------------------------------------------------- * * CToLevelZero() will increment the nesting level and then parse tokens * until level zero has been reached again. If tokens are no longer * available this loop will stop. * ---------------------------------------------------------------------------*/ void CToLevelZero(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { char open_brace[] = "{[("; /* open brace set */ char close_brace[] = ")]}"; /* close brace set */ int nesting_level = 1; token->else_nesting_level = 0; while (nesting_level) { if (CGetToken(token_buffer->infile, &(token_buffer->buffer), token_buffer->Cbuf, token->cur_token, MAX_TOKEN_LENGTH, &(token_buffer->token_line_location), &(token_buffer->token_char_location), &(token_buffer->token_line_offset))) { if (token->cur_token[0] == '#') { CParsePreprocessorDirective(token, token_buffer, outfile, infname, flags); } else { /* only count open brace, parens and brackets within blocks * of one element of an ifdef code block */ if (!token->else_nesting_level) { if (strchr(open_brace, token->cur_token[0])) { nesting_level++; } else { if (strchr(close_brace, token->cur_token[0])) { nesting_level--; } } } } } else nesting_level = 0; } } /*---------------------------------------------------------------------------- * * CToPunctuator() will parse tokens until the next punctuator has been * reached. If tokens are no longer available this loop will stop. If this * loop is successful the found flag declared in the host routine will be * set. * ---------------------------------------------------------------------------*/ BOOLEAN CToPunctuator(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { BOOLEAN punctuator_found; /* init and parse through until the first punctuator is found */ token->token_count = 0; punctuator_found = FALSE; while (!punctuator_found) { token->token_count++; CTokenSwap(token); if (!CNextToken(token, token_buffer, outfile, infname, flags)) { break; } else { if (IsPunctuator(token->cur_token[0])) punctuator_found = TRUE; } } /* return value */ return punctuator_found; } /*---------------------------------------------------------------------------- * * CParseParens() will move through a declaration in parentheses and place * the correct valid token as prev_token. This return TRUE if a '[' was seen * within the parens and false otherwise. * ---------------------------------------------------------------------------*/ BOOLEAN CParseParens(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { BOOLEAN token_found; BOOLEAN variable_seen; int brace_ignore = 1; token->else_nesting_level = 0; token_found = TRUE; variable_seen = FALSE; while (brace_ignore && token_found) { token_found = CNextToken(token, token_buffer, outfile, infname, flags); if (token_found && !token->else_nesting_level) { switch (token->cur_token[0]) { case '(': /* increment brace_ignore and continue */ brace_ignore++; break; case ')': /* just decrement brace_ignore if it is positive. If * brace ignore is not positive at this point then we * certainly have a syntax error. Ignore this fact if * so. */ if (brace_ignore) { brace_ignore--; } break; case '[': /* move to end of array bounds */ variable_seen = TRUE; CToLevelZero(token, token_buffer, outfile, infname, flags); break; default: CTokenSwap(token); break; } } } return variable_seen; } /*---------------------------------------------------------------------------- * * COutputCommaDelimitedToken() will output a token and then parse the * statement until ';' or ',' is reached. The token is output if the passed * token type is requested from the command line. * ---------------------------------------------------------------------------*/ void COutputCommaDelimitedToken(Token * token, Buffer * token_buffer, SymbolType token_type, FILE * outfile, char *infname, Flags * flags) { char open_brace[] = "{[("; /* open brace set */ BOOLEAN punctuator_found; /* output the token */ COutputToken(token, token_buffer, token_type, outfile, infname, flags); /* go to the next list punctuator (',' or ';') */ punctuator_found = TRUE; while (token->cur_token[0] != ',' && token->cur_token[0] != ';' && punctuator_found) { if (strchr(open_brace, token->cur_token[0])) { CToLevelZero(token, token_buffer, outfile, infname, flags); } punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); } } /*---------------------------------------------------------------------------- * * CParseCommaDelimitedList() will parse a token list seperated by commas * until a ';' is found. The tokens are output if the passed type is * requested from the command line. * ---------------------------------------------------------------------------*/ void CParseCommaDelimitedList(Token * token, Buffer * token_buffer, SymbolType token_type, FILE * outfile, char *infname, Flags * flags) { char open_brace[] = "{[("; /* open brace set */ BOOLEAN punctuator_found; /* parse through the list */ punctuator_found = TRUE; while (token->cur_token[0] != ';' && punctuator_found) { punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); if (punctuator_found) { switch (token->cur_token[0]) { case '(': /* this is an embedded variable declaration, either a * complex variable pointer or function pointer, fall * through after picking out the internal token */ CParseParens(token, token_buffer, outfile, infname, flags); case '[': case ',': case ';': case '=': /* this is one of the proper ending tokens for this type * of declaration list, so output it and parse to the * next correct punctuator */ COutputToken(token, token_buffer, token_type, outfile, infname, flags); while (token->cur_token[0] != ',' && token->cur_token[0] != ';' && punctuator_found) { if (strchr(open_brace, token->cur_token[0])) { CToLevelZero(token, token_buffer, outfile, infname, flags); } punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); } break; default: break; } } } } /*---------------------------------------------------------------------------- * * CParseFunctionOrGlobalVariable() will parse a function, prototype or * global variable syntax. * ---------------------------------------------------------------------------*/ void CParseFunctionOrGlobalVariable(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { char buf[MAX_TOKEN_LENGTH]; /* the first token buffer */ long int charloc; /* the char location of sbuf1 */ long int tokenline; /* the line number of sbuf1 */ long int lineoffset; /* the line offset of sbuf1 */ BOOLEAN token_found; BOOLEAN punctuator_found; BOOLEAN last_token_known; BOOLEAN variable_seen; /* init */ buf[0] = '\0'; charloc = 0; tokenline = 1; lineoffset = 0; /* save the previous token */ last_token_known = CIsDeclarationToken(token->prev_token); if (!last_token_known) { /* If this is not a known token then it may be a function name. Save * it then look further at the syntax. This also may be a symbol * previously defined via a typedef which alters the syntax of C/C++ */ strcpy(buf, token->prev_token); charloc = *(token->prev_char_location); tokenline = *(token->prev_token_line); lineoffset = *(token->prev_line_offset); } /* This is a function or prototype or global variable go to brace_ignore * level zero again. */ variable_seen = CParseParens(token, token_buffer, outfile, infname, flags); /* Check to see if this is a function, prototype, or global variable. If * the token is a ';' and last_token_known is false then we assume a * function. Strange variable declarations may fool this, but not * likely. If the character is a '(' then it is certainly a function or * prototype unless variable_seen is TRUE, then it is a variable. If the * character is a '[', ',' then it is certainly a variable declaration. * If the character is a ';' and last_token_known is true then it is a * variable declaration. If the token is anything else then it is a * function. */ token_found = CNextToken(token, token_buffer, outfile, infname, flags); if (token_found) { switch (token->cur_token[0]) { case ';': /* determine if a prototype or a variable declaration. if the * last_token_known is true then it is a global variable. If * the token was a symbol defined by a typedef then this * distinction is incorrect since typedef actually alters * syntax. This is correct for the large majority of cases * since most do not enclose simple variable declarations in * parens. */ if (last_token_known) { /* this is a global variable */ COutputToken(token, token_buffer, GlobalVariable, outfile, infname, flags); } else { /* this is a prototype, copy saved token back to * prev_token, output and continue */ strcpy(token->prev_token, buf); *(token->prev_char_location) = charloc; *(token->prev_token_line) = tokenline; *(token->prev_line_offset) = lineoffset; COutputToken(token, token_buffer, ProtoType, outfile, infname, flags); } break; case '(': if (variable_seen) { /* this is a variable declaration */ COutputCommaDelimitedToken(token, token_buffer, GlobalVariable, outfile, infname, flags); CParseCommaDelimitedList(token, token_buffer, GlobalVariable, outfile, infname, flags); } else { /* move to level zero again */ CToLevelZero(token, token_buffer, outfile, infname, flags); /* obtain the next token */ token_found = CNextToken(token, token_buffer, outfile, infname, flags); if (token_found) { /* check if prototype, function or function pointer * variable declaration */ switch (token->cur_token[0]) { case '=': /* this is a function pointer variable * declaration */ COutputCommaDelimitedToken(token, token_buffer, GlobalVariable, outfile, infname, flags); CParseCommaDelimitedList(token, token_buffer, GlobalVariable, outfile, infname, flags); break; case ';': /* this is a prototype, output it */ COutputToken(token, token_buffer, ProtoType, outfile, infname, flags); break; default: /* this is a function */ COutputToken(token, token_buffer, Function, outfile, infname, flags); /* parse through function */ punctuator_found = TRUE; while (token->cur_token[0] != '{' && punctuator_found) { punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); } if (punctuator_found) { CToLevelZero(token, token_buffer, outfile, infname, flags); } break; } } } break; case '[': case '=': case ',': /* global variables */ COutputCommaDelimitedToken(token, token_buffer, GlobalVariable, outfile, infname, flags); CParseCommaDelimitedList(token, token_buffer, GlobalVariable, outfile, infname, flags); break; default: /* this is a function, copy saved token back to prev_token, * output and continue */ strcpy(token->prev_token, buf); *(token->prev_char_location) = charloc; *(token->prev_token_line) = tokenline; *(token->prev_line_offset) = lineoffset; COutputToken(token, token_buffer, Function, outfile, infname, flags); /* parse through function */ punctuator_found = TRUE; while (token->cur_token[0] != '{' && punctuator_found) { punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); } if (punctuator_found) { CToLevelZero(token, token_buffer, outfile, infname, flags); } break; } } } /*---------------------------------------------------------------------------- * * CParseNOP() will parse an as of yet unrecognized statement. If I run into * a punctuator at this time then I have found either a structure declaration * (C++ 2.0), or a global variable declaration. If the punctuator is '[', * ',', '=', or ';' then it is a global variable declaration. If the * punctuator is a '{' then we have a structure declaration at this time we * should not run into any closing punctuators or syntax is in a bad way * ---------------------------------------------------------------------------*/ void CParseNOP(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { BOOLEAN token_found; switch (token->cur_token[0]) { case ';': case '=': case ',': case '[': /* global variables are here */ COutputCommaDelimitedToken(token, token_buffer, GlobalVariable, outfile, infname, flags); CParseCommaDelimitedList(token, token_buffer, GlobalVariable, outfile, infname, flags); token->extern_active = FALSE; token->CPP_extern_active = FALSE; token->static_active = FALSE; break; case '{': /* validate we are not in a C++ extern for C statements */ if (!token->CPP_extern_active) { /* this is a structure (C++ syntax) */ /* output it */ COutputToken(token, token_buffer, Structure, outfile, infname, flags); /* move through declaration */ CToLevelZero(token, token_buffer, outfile, infname, flags); /* get the next token */ token_found = CNextToken(token, token_buffer, outfile, infname, flags); /* if a token is available then output the list */ if (token_found) { CParseCommaDelimitedList(token, token_buffer, GlobalVariable, outfile, infname, flags); } token->extern_active = FALSE; token->static_active = FALSE; } break; case '(': CParseFunctionOrGlobalVariable(token, token_buffer, outfile, infname, flags); token->extern_active = FALSE; token->CPP_extern_active = FALSE; token->static_active = FALSE; break; case '"': if (!strcmp("\"C\"",token->cur_token)) token->CPP_extern_active = TRUE; break; default: /* true NOP */ break; } } /*---------------------------------------------------------------------------- * * CParseEnumerationConstants() will parse constants within an enumeration * declaration * ---------------------------------------------------------------------------*/ void CParseEnumerationConstants(Token *token, Buffer *token_buffer, FILE *outfile, char *infname, Flags *flags) { BOOLEAN punctuator_found; char open_brace[] = "({["; /* obtain the enumeration constants */ punctuator_found = TRUE; while (token->cur_token[0] != '}' && punctuator_found) { punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); if (punctuator_found) { switch (token->cur_token[0]) { case ',': case '=': /* this is one of the proper ending tokens for this type * of declaration list, so output it and parse to the * next correct punctuator */ COutputToken(token, token_buffer, EnumerationConstant, outfile, infname, flags); while (token->cur_token[0] != ',' && token->cur_token[0] != '}' && punctuator_found) { if (strchr(open_brace, token->cur_token[0])) { CToLevelZero(token, token_buffer, outfile, infname, flags); } punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); } break; default: break; } } } } /*---------------------------------------------------------------------------- * * CParseDeclarationStatement() will parse struct, enum and union * declarations. take the token just before the first punctuator, run * through the top level braces and parse for variables if the first * punctuator is a ';' then this is a global variable declaration, if the * first token[0] is a '{' then this is a global variable declaration. * ---------------------------------------------------------------------------*/ void CParseDeclarationStatement(Token * token, Buffer * token_buffer, SymbolType type, FILE * outfile, char *infname, Flags * flags) { BOOLEAN token_found; BOOLEAN punctuator_found; BOOLEAN primary_parse; punctuator_found = CToPunctuator(token, token_buffer, outfile, infname, flags); if (punctuator_found) { /* init */ primary_parse = TRUE; /* switch on current token */ switch (token->cur_token[0]) { /* this is truly an object declaration */ case '{': /* output only if this is not a variable declaration */ if (token->token_count != 1) { /* output it */ COutputToken(token, token_buffer, type, outfile, infname, flags); } /* check if enumeration */ if (type == Enumeration) { CParseEnumerationConstants(token, token_buffer, outfile, infname, flags); } else { /* move through declaration and fall through */ CToLevelZero(token, token_buffer, outfile, infname, flags); } /* get the next token, if one not available then break out of * case */ token_found = CNextToken(token, token_buffer, outfile, infname, flags); if (!token_found) break; /* fall through to take care of variable declarations after * setting pre-parse flag */ primary_parse = FALSE; case ';': case '=': case ',': case '[': /* if this is the first seen then output it */ if (primary_parse) { COutputCommaDelimitedToken(token, token_buffer, GlobalVariable, outfile, infname, flags); } CParseCommaDelimitedList(token, token_buffer, GlobalVariable, outfile, infname, flags); break; case '(': CParseFunctionOrGlobalVariable(token, token_buffer, outfile, infname, flags); break; default: /* not reached */ break; } } } /*---------------------------------------------------------------------------- * * CParseTypeDefinition() parses the typedef statement. take the token just * before the first *correct* punctuator, the ';', ',' or the '['. Tag any * declarations being done here, get the next token * ---------------------------------------------------------------------------*/ void CParseTypeDefinition(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { BOOLEAN token_found; BOOLEAN parens_found; BOOLEAN special_found; int token_count; SymbolType tmptype; token_found = CNextToken(token, token_buffer, outfile, infname, flags); if (token_found) { /* check the type of the token for future use */ tmptype = CTokenType(token->cur_token); /* parse the typedef */ parens_found = FALSE; special_found = FALSE; token_count = 0; while (token->cur_token[0] != ';' && token->cur_token[0] != ',' && token->cur_token[0] != '[' && token_found && !special_found) { /* parse for defines */ if (token_found) { /* handle the punctuators */ switch (token->cur_token[0]) { case '{': /* pass through any defines going on here */ if (token->cur_token[0] == '{') { /* if the token count is > 1 here then we have a * named declaration and need to output the * token, output only if the token type is enum, * struct, or union */ if (token_count > 1 && (tmptype == Structure || tmptype == Enumeration || tmptype == Union)) { COutputToken(token, token_buffer, tmptype, outfile, infname, flags); } if (tmptype == Enumeration) { CParseEnumerationConstants(token, token_buffer, outfile, infname, flags); } else { /* go back to level 0 */ CToLevelZero(token, token_buffer, outfile, infname, flags); } } break; case '(': /* if this is the top level and we have already been * through a set of parens then we know this to be a * function typedef so we output the previous token, * otherwise check the previous token and if it is a * known keyword then just eat the token and continue * on our way */ if (parens_found) { COutputToken(token, token_buffer, TypeDefinition, outfile, infname, flags); CToLevelZero(token, token_buffer, outfile, infname, flags); special_found = TRUE; } else { /* Move back to the top level */ CParseParens(token, token_buffer, outfile, infname, flags); /* next paren we find we know we have a token */ parens_found = TRUE; /* swap to prevent loss of token */ CTokenSwap(token); } break; default: /* if we have another token after a paren parse then * we know the token in the parens was nothing * special */ parens_found = FALSE; break; } } /* get another token */ CTokenSwap(token); token_found = CNextToken(token, token_buffer, outfile, infname, flags); token_count++; } /* output the typedef names if appropriate */ if (token->prev_token[0] != '}' && token_found) { /* don't output the first token if already done */ if (!special_found) { COutputCommaDelimitedToken(token, token_buffer, TypeDefinition, outfile, infname, flags); } /* parse through the rest of the typedef names */ CParseCommaDelimitedList(token, token_buffer, TypeDefinition, outfile, infname, flags); } } } /*---------------------------------------------------------------------------- * * CParseClass() will parse the C++ class syntax. take the token just before * the first '{', ',' or ':' and run through the top level braces if there * ---------------------------------------------------------------------------*/ void CParseClass(Token * token, Buffer * token_buffer, FILE * outfile, char *infname, Flags * flags) { BOOLEAN token_found; token_found = TRUE; while (token->cur_token[0] != '{' && token->cur_token[0] != ':' && token->cur_token[0] != ';' && token_found) { /* save the current token */ CTokenSwap(token); /* get the next token */ token_found = CNextToken(token, token_buffer, outfile, infname, flags); } /* output the class name */ if (token_found) { COutputToken(token, token_buffer, Class, outfile, infname, flags); /* parse through the remainder of the statement */ while (token->cur_token[0] != ';' && token_found) { if (token->cur_token[0] == '{') { /* move back to the zero level */ CToLevelZero(token, token_buffer, outfile, infname, flags); } token_found = CNextToken(token, token_buffer, outfile, infname, flags); } } } /*---------------------------------------------------------------------------- * * CTags() tags an input stream assuming standard ANSI 2.0 C/C++ syntax. * Long tokens are allowed, ANSI requires only 31 significant. * ---------------------------------------------------------------------------*/ void CTags(FILE * infile, char *infname, FILE * outfile, Flags * flags) { SymbolType type; /* the type of the current token */ Buffer input_buffer; /* the file buffer and state, stack alloc */ Buffer *token_buffer = &input_buffer; /* a convenient pointer */ Token token_state; /* the token state, stack alloc */ Token *token = &token_state;/* a convenient pointer to token state */ BOOLEAN token_found; /* set by CNextToken() */ /* init the parser engine */ CParserInit(); token->token_count = 0; /* init the current token buffers */ token->cur_token = token->sbuf1; token->cur_char_location = &(token->charloc1); token->cur_token_line = &(token->tokenline1); token->cur_token[0] = '\0'; token->cur_line_offset = &(token->lineoffset1); *(token->cur_char_location) = 0; *(token->cur_token_line) = 1; *(token->cur_line_offset) = 0; /* init the previous token buffers */ token->prev_token = token->sbuf2; token->prev_char_location = &(token->charloc2); token->prev_token_line = &(token->tokenline2); token->prev_token[0] = '\0'; token->prev_line_offset = &(token->lineoffset2); *(token->prev_char_location) = 0; *(token->prev_token_line) = 1; *(token->prev_line_offset) = 0; /* init the input buffers */ token_buffer->token_char_location = 0; token_buffer->token_line_location = 1; token_buffer->token_line_offset = 0; token_buffer->Cbuf[0] = '\0'; token_buffer->buffer = token_buffer->Cbuf; token_buffer->infile = infile; /* init Extern and Static states */ token->extern_active = FALSE; token->CPP_extern_active = FALSE; token->static_active = FALSE; /* get the first token */ token_found = CNextToken(token, token_buffer, outfile, infname, flags); /* loop through the file */ while (token_found) { /* obtain the token type */ type = CTokenType(token->cur_token); /* react on the token type */ switch (type) { case NOP: CParseNOP(token, token_buffer, outfile, infname, flags); break; case Structure: case Enumeration: case Union: CParseDeclarationStatement(token, token_buffer, type, outfile, infname, flags); break; case TypeDefinition: CParseTypeDefinition(token, token_buffer, outfile, infname, flags); break; case Class: CParseClass(token, token_buffer, outfile, infname, flags); break; case Extern: token->extern_active = TRUE; break; case Static: token->static_active = TRUE; break; default: /* not reached */ break; } if (type != Extern && type != Static && type != NOP) { /* turn off the extern and static flag */ token->extern_active = FALSE; token->CPP_extern_active = FALSE; token->static_active = FALSE; } /* swap state variables and get the next token */ CTokenSwap(token); token_found = CNextToken(token, token_buffer, outfile, infname, flags); } }